Single-cell RNA sequencing analysis of E16.5 WT vs. Bgn KO limbs

“Biglycan regulates bone development and regeneration” Reut Shainer, Vardit Kram, Tina M. Kilts, Li Li, Andrew Doyle, Inbal Shainer, Carl G. Simon Jr., Liliana Schaefer, Marian F. Young. 2022

Author: Inbal Shainer


WT and Bgn KO embryos’ bones were dissected and dissociated. Each sample was splitted into two samples that were uploaded separately on the 10x chromium chip. The CellRanger filtered cell-gene matrix was used for the downstream analysis described here.

#load the required packages
library(dplyr)
library(Seurat)
library(Matrix)
library(ggplot2)
library(reticulate)
library(SeuratWrappers)
library(harmony)

Seuart objects were created for the WT and Bgn KO samples. All the samples were merged into a single Seurat object. The genotype was added as metadata.

#create seurat object for ko
ko1E16.5_data <- Read10X(data.dir = "~/Desktop/scSeq_Reut/E16.5/E16.5_KOA/filtered_feature_bc_matrix")
ko1E16.5 <- CreateSeuratObject(counts = ko1E16.5_data, project = "Bgn KO 1", min.cells = 3)
ko1E16.5<- AddMetaData(ko1E16.5, metadata = "Bgn KO", col.name="genotype")
ko2E16.5_data <- Read10X(data.dir = "~/Desktop/scSeq_Reut/E16.5/E16.5_KOB/filtered_feature_bc_matrix")
ko2E16.5 <- CreateSeuratObject(counts = ko2E16.5_data, project = "Bgn KO 2", min.cells = 3)
ko2E16.5<- AddMetaData(ko2E16.5, metadata = "Bgn KO", col.name="genotype")

#create seurat object for wt
wt1E16.5_data <- Read10X(data.dir = "~/Desktop/scSeq_Reut/E16.5/WT1E165/filtered_feature_bc_matrix")
wt1E16.5 <- CreateSeuratObject(counts = wt1E16.5_data, project = "WT 1", min.cells = 3)
wt1E16.5<- AddMetaData(wt1E16.5, metadata = "WT", col.name="genotype")
wt2E16.5_data <- Read10X(data.dir = "~/Desktop/scSeq_Reut/E16.5/WT2E165/filtered_feature_bc_matrix")
wt2E16.5 <- CreateSeuratObject(counts = wt2E16.5_data, project = "WT 2", min.cells = 3)
wt2E16.5<- AddMetaData(wt2E16.5, metadata = "WT", col.name="genotype")

#merged object
combined<-merge(x = ko1E16.5, y = c(ko2E16.5, wt1E16.5, wt2E16.5), add.cell.ids = c("Bgn KO 1", "Bgn KO 2", "WT 1", "WT 2"), project = "combined")
combined
## An object of class Seurat 
## 20707 features across 38635 samples within 1 assay 
## Active assay: RNA (20707 features, 0 variable features)
combined[["percent.mt"]] <- PercentageFeatureSet(object = combined, pattern = "^mt-")

Cells quality was checked by examining the distribution of cells according to the number of detected genes, UMI count and the percentage of mitochonrial gene expression.

# Visualize QC metrics as a violin plot
VlnPlot_before_filtering<- VlnPlot(object = combined, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3, pt.size = 0)
VlnPlot_before_filtering 

The total number of cells before filtration:

table(...=combined@active.ident)
## ...
## Bgn KO 1 Bgn KO 2     WT 1     WT 2 
##     9338    11071     8133    10093

Cells with unusual number of genes (<200), UMI count (>15000) or percentage of mitochondrial genes (>20%) are filtered out.

combined <- subset(x = combined, 
                   subset = nFeature_RNA > 200 & 
                     percent.mt < 20 & 
                     nCount_RNA < 15000)

VlnPlot_after_filtering<- VlnPlot(object = combined, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), 
                                  ncol = 3, pt.size = 0)
VlnPlot_after_filtering

The total number of cells after filtration:

# number of cells per sample
table(...=combined@active.ident)
## ...
## Bgn KO 1 Bgn KO 2     WT 1     WT 2 
##     6690     8364     6131     8650
#total number of cells
sum(table(...=combined@active.ident))
## [1] 29835

The data was then normalized by the “LogNormalization” method implemented in the Seurat package (scale factor =10,000) and scaled using Seurat’s default settings. The 2000 top variable genes were identified using the “vst” method implemented in Seurat.

combined <- NormalizeData(object = combined, 
                          normalization.method = "LogNormalize",
                          scale.factor = 10000)

combined <- FindVariableFeatures(object = combined, 
                                 selection.method = "vst", 
                                 nfeatures = 2000)

all.genes.combined <- rownames(x = combined)
combined<- ScaleData(object = combined, 
                             features = all.genes.combined)
## Centering and scaling data matrix

Linear dimensional reduction (PCA) was performed.

combined<- RunPCA(object = combined, 
                  features = VariableFeatures(object = combined))
## PC_ 1 
## Positive:  Tyrobp, Fcer1g, Laptm5, Coro1a, Fcgr3, Lcp1, Ms4a6c, Alox5ap, Rac2, Lyz2 
##     Lst1, Ctss, Ctsc, F13a1, Spi1, Fyb, Gmfg, Ptpn18, Csf1r, Cd53 
##     Cx3cr1, Cd52, Ms4a6b, Pld4, Slfn2, Ptprc, Apoe, Plek, C1qb, Srgn 
## Negative:  Sparc, Cdkn1c, Peg3, Col3a1, Dlk1, Col1a2, Itm2a, Col1a1, H19, Col6a1 
##     Fstl1, Col5a2, Ptn, Plagl1, Col6a2, Fbn2, Col6a3, Dcn, Postn, Nrk 
##     Cald1, Tuba1a, Kcnq1ot1, Ndn, Col12a1, Ogn, Plpp3, Egr1, Fbn1, Vcan 
## PC_ 2 
## Positive:  Actc1, Myog, Tnnt2, Rbm24, Ttn, Vgll2, Neb, Mymx, Tnnt1, Arpp21 
##     Mymk, Gm28653, Cdh15, Chrna1, Tnni1, Myod1, Ank1, Fitm1, Klhl41, Jsrp1 
##     Cryab, Pnmal2, Tpm2, Spg21, Cd82, Msi2, Acta2, Hspb2, Megf10, Mrln 
## Negative:  Col3a1, Col1a2, Col1a1, Dcn, Col5a2, Fstl1, Col6a3, Postn, Col6a2, Col6a1 
##     S100a6, Ptn, Akap12, Fbn2, Igf1, Igfbp4, Fn1, Col14a1, Fbn1, Plpp3 
##     Sparc, Ahnak, Vcan, Nid1, Mfap5, Agtr2, Dpysl3, Ebf1, Basp1, Itih5 
## PC_ 3 
## Positive:  Col11a1, Mia, Col2a1, Col9a2, Col9a1, Acan, Col9a3, Cnmd, Col11a2, Hapln1 
##     Comp, Matn1, Col27a1, Snorc, Cthrc1, Matn4, Scrg1, Cytl1, Mgp, Wwp2 
##     Chad, Matn3, Susd5, Fmod, Lgals3, Epyc, Ecrg4, Sox5, Fkbp11, Sox9 
## Negative:  Top2a, Mki67, Tpx2, Cenpf, Prc1, Cenpe, Cdca8, Birc5, Hmmr, Smc4 
##     Nusap1, Pclaf, Hmgb2, Ccna2, Smc2, Kif11, Ube2c, Incenp, Stmn1, Knl1 
##     Cdca3, Ccnb1, Racgap1, Cks2, Tuba1b, Spc25, Cdk1, H2afz, Anln, Kif20b 
## PC_ 4 
## Positive:  Acan, Col9a3, Col9a1, Cnmd, Col2a1, Hapln1, Col9a2, Col11a2, Mia, Matn1 
##     Comp, Col11a1, Snorc, Matn3, Susd5, Wwp2, Matn4, Scrg1, Epyc, Sox9 
##     Cytl1, Chad, Sox5, Ncmap, Papss2, Fgfr3, Col27a1, Sdc4, Sorbs2, Chst11 
## Negative:  Tmsb4x, Actc1, Igf1, Tnnt2, Tnnt1, Myog, Mymx, Tnni1, Agtr2, Akap12 
##     Ttn, Postn, Clec3b, Mymk, Fndc5, Des, Tpm2, Neb, Mfap5, Myl1 
##     Rbm24, Klhl41, Dbn1, Mylpf, Igfbp4, Arpp21, Chrna1, Ogn, Gm28653, Pnmal2 
## PC_ 5 
## Positive:  Hdc, Cpa3, Cma1, Tpsb2, Cyp11a1, Serpinb1a, Il1rl1, Gata2, Rab27b, Maob 
##     Kit, Cd200r3, Adora3, Slc6a4, Tpsg1, Srgn, Lat2, Gpr65, Cst7, Hs3st1 
##     Csf2rb, Tpsab1, Atp8b5, Gnaz, Slc45a3, Rgs18, Smpx, Rgs13, Otulinl, Ptprcap 
## Negative:  Gatm, Cdkn1c, Mrc1, Ttn, C1qc, C1qb, C1qa, Tnnt2, Fcrls, Actc1 
##     Mymx, Myog, F13a1, Lgmn, Neb, Ms4a6b, Mymk, Csf1r, Pf4, Rbm24 
##     Tnni1, Adgre1, Stab1, Ms4a7, Itm2a, Arpp21, Gm28653, Ms4a6c, Aif1, Frmd4b

The percentage of variance explained by the different principle components was visualized by an elbow plot.

ElbowPlot(object=combined, ndims = 30)

Batch correction was performed using “Harmony” (https://www.nature.com/articles/s41592-019-0619-0).

#Run Harmony
combined <- RunHarmony(combined, group.by.vars = "orig.ident")
## Harmony 1/10
## Harmony 2/10
## Harmony 3/10
## Harmony 4/10
## Harmony 5/10
## Harmony converged after 5 iterations
## Warning: Invalid name supplied, making object name syntactically valid. New
## object name is Seurat..ProjectDim.RNA.harmony; see ?make.names for more details
## on syntax validity

Clustering analysis was performed using the harmony embedding.

combined <- RunUMAP(combined, reduction = "harmony", dims = 1:30) 
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 17:15:24 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:15:24 Read 29835 rows and found 30 numeric columns
## 17:15:24 Using Annoy for neighbor search, n_neighbors = 30
## 17:15:24 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:15:27 Writing NN index file to temp file /var/folders/0w/gmr72lcn777drypdcj7nl9mh3jyl0w/T//Rtmplifesl/file6ba377a97098
## 17:15:27 Searching Annoy index using 1 thread, search_k = 3000
## 17:15:34 Annoy recall = 100%
## 17:15:34 Commencing smooth kNN distance calibration using 1 thread
## 17:15:35 Initializing from normalized Laplacian + noise
## 17:15:37 Commencing optimization for 200 epochs, with 1302900 positive edges
## 17:15:51 Optimization finished
combined <- FindNeighbors(combined, reduction = "harmony", dims = 1:30) 
## Computing nearest neighbor graph
## Computing SNN
combined <- FindClusters(object = combined, resolution = 1.2)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 29835
## Number of edges: 1063646
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9009
## Number of communities: 40
## Elapsed time: 4 seconds

Visualization of the clusters using UMAP, according to sample and genotype.

DimPlot(combined, group.by = c("orig.ident"))

DimPlot(combined, group.by = c("genotype"))

Visualization of the clusters according to cluster identity.

DimPlot(object = (combined), label=TRUE, label.size = 4, pt.size = 0.5) + 
  theme(axis.title.x=element_text(size=12),
        axis.title.y=element_text(size=12),
        legend.position = "none") 

To identify the cell type, the top deferentially expressed genes for each cluster were analyzed.

combined.markers <- FindAllMarkers(object = combined, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.8)
combined.markers %>% group_by(cluster) %>% top_n(n = 10, wt = avg_log2FC)
## Registered S3 method overwritten by 'cli':
##   method     from         
##   print.boxx spatstat.geom
## # A tibble: 391 × 7
## # Groups:   cluster [40]
##    p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene  
##    <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr> 
##  1     0       1.43 0.964 0.487         0 0       Vcan  
##  2     0       1.27 0.944 0.403         0 0       Igf1  
##  3     0       1.24 0.967 0.571         0 0       Nrk   
##  4     0       1.23 0.531 0.131         0 0       Rbp4  
##  5     0       1.20 0.874 0.504         0 0       Ndn   
##  6     0       1.19 0.722 0.254         0 0       Smoc2 
##  7     0       1.18 0.986 0.588         0 0       Col6a3
##  8     0       1.17 0.735 0.194         0 0       Ebf2  
##  9     0       1.15 0.999 0.809         0 0       Dlk1  
## 10     0       1.12 0.898 0.419         0 0       Plpp3 
## # … with 381 more rows

Merging of over-clustered cells and renaming the clusters according to the cell type.

# rename clusters
new.cluster.ids <- c("embryonic fibroblasts", "osteoblasts 1", "skeletal progenitors",
                     "chondrocytes 2", "neuronal progenitors 1", "progenitors 1",
                     "osteoblasts 4", "osteoblasts 3", "satellite cells",
                     "skeletal muscle cells", "myoblasts", "tendon progenitors 1",
                     "neuronal progenitors 3", "endothelial cells", "progenitors 2",
                     "M1 macrophages", "stroma cells", "erythrocytes progenitors 1",
                     "monocytes", "mast cells", "Schwann cells",
                     "erythrocytes progenitors 2", "osteoblasts 2", "chondrocytes 4",
                     "smooth muscle cells", "neuronal progenitors 2", "M2 macrophages",
                     "articular cartilage cells", "tendon progenitors 2", "erythroid-like precursors",
                     "keratinocytes", "chondrocytes 3", "chondrocytes 1",
                     "skeletal muscle cells 1", "skeletal muscle cells 2", "neutrophils",
                     "lymphoid progenitors", "osteoblasts/ chondrocytes", "mast cells",
                     "myoblasts")
names(new.cluster.ids) <- levels(combined)
combined <- RenameIdents(combined, new.cluster.ids)

DimPlot(object = (combined), label=TRUE, label.size = 4, pt.size = 0.5) + 
  theme(axis.title.x=element_text(size=12),
        axis.title.y=element_text(size=12),
        legend.position = "none") 

Reanalyze the cluster markers after the merging of the over-clustered cells.

combined.markers <- FindAllMarkers(object = combined, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.8)

What is the contribution of the different genotypes to each cell type? Are certain cell types exist more in one genotype and not the other? The number of cells for each cluster according to the genotype and technical replica was claculated.

pc.cell.in.cluster<-as.data.frame(table(combined@active.ident, combined@meta.data$orig.ident))
pc.cell.in.cluster
##                           Var1     Var2 Freq
## 1        embryonic fibroblasts Bgn KO 1 1061
## 2                osteoblasts 1 Bgn KO 1  567
## 3         skeletal progenitors Bgn KO 1  402
## 4               chondrocytes 2 Bgn KO 1  281
## 5       neuronal progenitors 1 Bgn KO 1  449
## 6                progenitors 1 Bgn KO 1  347
## 7                osteoblasts 4 Bgn KO 1  112
## 8                osteoblasts 3 Bgn KO 1  176
## 9              satellite cells Bgn KO 1  347
## 10       skeletal muscle cells Bgn KO 1  303
## 11                   myoblasts Bgn KO 1  348
## 12        tendon progenitors 1 Bgn KO 1  195
## 13      neuronal progenitors 3 Bgn KO 1   64
## 14           endothelial cells Bgn KO 1  160
## 15               progenitors 2 Bgn KO 1  173
## 16              M1 macrophages Bgn KO 1  143
## 17                stroma cells Bgn KO 1  142
## 18  erythrocytes progenitors 1 Bgn KO 1  138
## 19                   monocytes Bgn KO 1   90
## 20                  mast cells Bgn KO 1   96
## 21               Schwann cells Bgn KO 1   75
## 22  erythrocytes progenitors 2 Bgn KO 1  125
## 23               osteoblasts 2 Bgn KO 1   88
## 24              chondrocytes 4 Bgn KO 1   39
## 25         smooth muscle cells Bgn KO 1   54
## 26      neuronal progenitors 2 Bgn KO 1   93
## 27              M2 macrophages Bgn KO 1   62
## 28   articular cartilage cells Bgn KO 1   59
## 29        tendon progenitors 2 Bgn KO 1   54
## 30   erythroid-like precursors Bgn KO 1   93
## 31               keratinocytes Bgn KO 1    7
## 32              chondrocytes 3 Bgn KO 1   54
## 33              chondrocytes 1 Bgn KO 1   60
## 34     skeletal muscle cells 1 Bgn KO 1   65
## 35     skeletal muscle cells 2 Bgn KO 1   46
## 36                 neutrophils Bgn KO 1   46
## 37        lymphoid progenitors Bgn KO 1   40
## 38   osteoblasts/ chondrocytes Bgn KO 1   36
## 39       embryonic fibroblasts Bgn KO 2 1208
## 40               osteoblasts 1 Bgn KO 2  608
## 41        skeletal progenitors Bgn KO 2  396
## 42              chondrocytes 2 Bgn KO 2  304
## 43      neuronal progenitors 1 Bgn KO 2  473
## 44               progenitors 1 Bgn KO 2  764
## 45               osteoblasts 4 Bgn KO 2   68
## 46               osteoblasts 3 Bgn KO 2  230
## 47             satellite cells Bgn KO 2  464
## 48       skeletal muscle cells Bgn KO 2  384
## 49                   myoblasts Bgn KO 2  398
## 50        tendon progenitors 1 Bgn KO 2  276
## 51      neuronal progenitors 3 Bgn KO 2   64
## 52           endothelial cells Bgn KO 2  234
## 53               progenitors 2 Bgn KO 2  300
## 54              M1 macrophages Bgn KO 2  164
## 55                stroma cells Bgn KO 2  180
## 56  erythrocytes progenitors 1 Bgn KO 2  138
## 57                   monocytes Bgn KO 2  148
## 58                  mast cells Bgn KO 2  141
## 59               Schwann cells Bgn KO 2  105
## 60  erythrocytes progenitors 2 Bgn KO 2  118
## 61               osteoblasts 2 Bgn KO 2   98
## 62              chondrocytes 4 Bgn KO 2   69
## 63         smooth muscle cells Bgn KO 2   65
## 64      neuronal progenitors 2 Bgn KO 2  100
## 65              M2 macrophages Bgn KO 2   96
## 66   articular cartilage cells Bgn KO 2   87
## 67        tendon progenitors 2 Bgn KO 2   75
## 68   erythroid-like precursors Bgn KO 2   74
## 69               keratinocytes Bgn KO 2   18
## 70              chondrocytes 3 Bgn KO 2   31
## 71              chondrocytes 1 Bgn KO 2   55
## 72     skeletal muscle cells 1 Bgn KO 2  121
## 73     skeletal muscle cells 2 Bgn KO 2  125
## 74                 neutrophils Bgn KO 2  103
## 75        lymphoid progenitors Bgn KO 2   44
## 76   osteoblasts/ chondrocytes Bgn KO 2   38
## 77       embryonic fibroblasts     WT 1  445
## 78               osteoblasts 1     WT 1  423
## 79        skeletal progenitors     WT 1  494
## 80              chondrocytes 2     WT 1  476
## 81      neuronal progenitors 1     WT 1  267
## 82               progenitors 1     WT 1  203
## 83               osteoblasts 4     WT 1  608
## 84               osteoblasts 3     WT 1  432
## 85             satellite cells     WT 1  128
## 86       skeletal muscle cells     WT 1  147
## 87                   myoblasts     WT 1  128
## 88        tendon progenitors 1     WT 1  108
## 89      neuronal progenitors 3     WT 1  320
## 90           endothelial cells     WT 1  169
## 91               progenitors 2     WT 1   96
## 92              M1 macrophages     WT 1  142
## 93                stroma cells     WT 1  116
## 94  erythrocytes progenitors 1     WT 1  179
## 95                   monocytes     WT 1  108
## 96                  mast cells     WT 1  116
## 97               Schwann cells     WT 1   97
## 98  erythrocytes progenitors 2     WT 1   90
## 99               osteoblasts 2     WT 1   83
## 100             chondrocytes 4     WT 1  102
## 101        smooth muscle cells     WT 1  102
## 102     neuronal progenitors 2     WT 1   59
## 103             M2 macrophages     WT 1   80
## 104  articular cartilage cells     WT 1   54
## 105       tendon progenitors 2     WT 1   30
## 106  erythroid-like precursors     WT 1   34
## 107              keratinocytes     WT 1  124
## 108             chondrocytes 3     WT 1   68
## 109             chondrocytes 1     WT 1   31
## 110    skeletal muscle cells 1     WT 1    4
## 111    skeletal muscle cells 2     WT 1    9
## 112                neutrophils     WT 1   10
## 113       lymphoid progenitors     WT 1   22
## 114  osteoblasts/ chondrocytes     WT 1   27
## 115      embryonic fibroblasts     WT 2  671
## 116              osteoblasts 1     WT 2  646
## 117       skeletal progenitors     WT 2  829
## 118             chondrocytes 2     WT 2  691
## 119     neuronal progenitors 1     WT 2  424
## 120              progenitors 1     WT 2  274
## 121              osteoblasts 4     WT 2  697
## 122              osteoblasts 3     WT 2  596
## 123            satellite cells     WT 2  171
## 124      skeletal muscle cells     WT 2  220
## 125                  myoblasts     WT 2  211
## 126       tendon progenitors 1     WT 2  318
## 127     neuronal progenitors 3     WT 2  419
## 128          endothelial cells     WT 2  184
## 129              progenitors 2     WT 2  147
## 130             M1 macrophages     WT 2  181
## 131               stroma cells     WT 2  175
## 132 erythrocytes progenitors 1     WT 2   97
## 133                  monocytes     WT 2  123
## 134                 mast cells     WT 2  184
## 135              Schwann cells     WT 2  134
## 136 erythrocytes progenitors 2     WT 2   77
## 137              osteoblasts 2     WT 2  121
## 138             chondrocytes 4     WT 2  167
## 139        smooth muscle cells     WT 2  121
## 140     neuronal progenitors 2     WT 2   86
## 141             M2 macrophages     WT 2   99
## 142  articular cartilage cells     WT 2  115
## 143       tendon progenitors 2     WT 2  107
## 144  erythroid-like precursors     WT 2   44
## 145              keratinocytes     WT 2   78
## 146             chondrocytes 3     WT 2   72
## 147             chondrocytes 1     WT 2   66
## 148    skeletal muscle cells 1     WT 2   10
## 149    skeletal muscle cells 2     WT 2   10
## 150                neutrophils     WT 2   12
## 151       lymphoid progenitors     WT 2   54
## 152  osteoblasts/ chondrocytes     WT 2   19

Visualization of the contribution of the different genotypes to each cell type.

DimPlot(object = (combined), pt.size = 0.5, split.by = "genotype") + 
  theme(axis.title.x=element_text(size=12),
        axis.title.y=element_text(size=12)) 

Expression of Bgn:

FeaturePlot(combined, features = c("Bgn"))

Splitted by genotype:

FeaturePlot(combined, features = c("Bgn"), split.by = "genotype") + theme(legend.position="right")

VlnPlot(combined, features = c("Bgn"), split.by = "genotype", pt.size = 0)
## The default behaviour of split.by has changed.
## Separate violin plots are now plotted side-by-side.
## To restore the old behaviour of a single split violin,
## set split.plot = TRUE.
##       
## This message will be shown once per session.